Hands-on Exercise 6: Spatial Weights and Applications

Published

December 1, 2023

Modified

December 2, 2023

Objective

Data Set

Getting Started

pacman::p_load(tmap, sf, DT, stplanr,
               performance,
               ggpubr, tidyverse)

Import shapefile (geospatial) into R - busstop and MPSZ

busstop <- st_read(dsn = "data/geospatial",
                   layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `C:\cftoh\ISSS624\Hands-on_Ex\Hands-on_Ex6\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
mpsz <- st_read(dsn = "data/geospatial",
                   layer = "MPSZ-2019") %>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `C:\cftoh\ISSS624\Hands-on_Ex\Hands-on_Ex6\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
mpsz <- write_rds(mpsz, "data/rds/mpsz.rds")

Import csv (aspatial) into R (The Origin Destination, OD data)

odbus <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
glimpse(odbus)
Rows: 5,709,512
Columns: 7
$ YEAR_MONTH          <chr> "2023-08", "2023-08", "2023-08", "2023-08", "2023-…
$ DAY_TYPE            <chr> "WEEKDAY", "WEEKENDS/HOLIDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR       <dbl> 16, 16, 14, 14, 17, 17, 17, 17, 7, 17, 14, 10, 10,…
$ PT_TYPE             <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE      <chr> "04168", "04168", "80119", "80119", "44069", "4406…
$ DESTINATION_PT_CODE <chr> "10051", "10051", "90079", "90079", "17229", "1722…
$ TOTAL_TRIPS         <dbl> 7, 2, 3, 10, 5, 4, 3, 22, 3, 3, 7, 1, 3, 1, 3, 1, …

For the purpose of this exercise, we will extract commuting flows on weekday and between 6 and 9 o’clock.

odbus6_9 <- odbus %>%
  filter(DAY_TYPE == "WEEKDAY") %>%
  filter(TIME_PER_HOUR >= 6 &
           TIME_PER_HOUR <= 9) %>%
  group_by(ORIGIN_PT_CODE,
           DESTINATION_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))

Content of odbus6_9

datatable(odbus6_9)

Save the data for future use (in rds format)

write_rds(odbus6_9, "data/rds/odbus6_9.rds")

Can use the following to read the data

odbus6_9 <- read_rds("data/rds/odbus6_9.rds")

Data Wrangling

Combining the two geospatial data:

busstop_mpsz <- st_intersection(busstop, mpsz) %>%
  select(BUS_STOP_N, SUBZONE_C) %>%
  st_drop_geometry()
datatable(busstop_mpsz)

Save data for future use

write_rds(busstop_mpsz, "data/rds/busstop_mpsz.rds")  

Performing the Relational Join (to update attribute table of one geospatial with another aspatial data set)

od_data <- left_join(odbus6_9 , busstop_mpsz,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = SUBZONE_C,
         DESTIN_BS = DESTINATION_PT_CODE)

Data Sanity check - for duplicate records

duplicate <- od_data %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()

Noted 1180 duplicate record, so use the following to retain unique records

od_data <- unique(od_data)

Next is to update the dataframe with planning subzone codes from the busstop_mpsz

od_data <- left_join(od_data , busstop_mpsz,
            by = c("DESTIN_BS" = "BUS_STOP_N")) 

duplicate <- od_data %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()

od_data <- unique(od_data)

od_data <- od_data %>%
  rename(DESTIN_SZ = SUBZONE_C) %>%
  drop_na() %>%
  group_by(ORIGIN_SZ, DESTIN_SZ) %>%
  summarise(MORNING_PEAK = sum(TRIPS))

Save and read the file

write_rds(od_data, "data/rds/od_data.rds")

od_data <- read_rds("data/rds/od_data.rds")

Visualising Spatial Interaction

Removing intra-zonal flows

od_data1 <- od_data[od_data$ORIGIN_SZ!=od_data$DESTIN_SZ,]

Creating desire lines

flowLine <- od2line(flow = od_data1, 
                    zones = mpsz,
                    zone_code = "SUBZONE_C")

Visualising the desire lines

tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
  tm_polygons() +
  flowLine %>%  
  tm_shape() +
  tm_lines(lwd = "MORNING_PEAK",
           style = "quantile",
           scale = c(0.1, 1, 3, 5, 7, 10),
           n = 6,
           alpha = 0.3)

When the flow data are very messy and highly skewed , it is wiser to focus on selected flows, for example flow greater than or equal to 5000 as shown below.

tm_shape(mpsz) +
  tm_polygons() +
flowLine %>%  
  filter(MORNING_PEAK >= 5000) %>%
tm_shape() +
  tm_lines(lwd = "MORNING_PEAK",
           style = "quantile",
           scale = c(0.1, 1, 3, 5, 7, 10),
           n = 6,
           alpha = 0.3)